/*******************************************************************************

Ryan Hill: ryan.hill@kellogg.northwestern.edu
Carolyn Stein: carolyn_stein@berkeley.edu
Last modified: December 2024

Inputs:		clean_survey.dta

Outputs:	Figures 10, 11, E9

Purpose: 	Analyze survey experiment data

*******************************************************************************/

clear all
use "${data_clean}/clean_survey.dta"

* Sample restriction: must have answered all questions
foreach q in competition maturation {
	drop if `q' == .
}

foreach q in replicate additionalExp codeReview mathReview proofread litReview {
	drop if `q' == ""
}

/*------------------------------------------------------------------------------

	Figure 10: Survey Experiment Results: PDB Respondents

------------------------------------------------------------------------------*/	
preserve
keep if field == 0

* Initialize matrix to store regression results
matrix pdb_results = J(2,27,.)
local row = 1
local col = 1

* Effect of potential
reg competition highPotential, r
	
	* Low potential effect and SE
	matrix pdb_results[`row', `col'] = _b[_cons]
	local ++col
	matrix pdb_results[`row', `col'] = r(table)[5,2]
	local ++col
	matrix pdb_results[`row', `col'] = r(table)[6,2]
	local col = `col' - 2
	local ++row

	* p-value of difference
	local p_competition = round(r(table)[4,1], 0.001)

	* High potential effect and SE
	lincom highPotential + _cons		
	matrix pdb_results[`row', `col'] = r(estimate)
	local ++col 
	matrix pdb_results[`row', `col'] = r(lb)
	local ++col 
	matrix pdb_results[`row', `col'] = r(ub)
	local ++col
	local row = 1

* Effect of competition
* Loop over all outcomes for competition regressions
foreach y in maturation replicateScore additionalExpScore codeReviewScore 	///
mathReviewScore proofreadScore litReviewScore qualityIndexScore {
	
	reg `y' highCompetition, r

	* Low competition effect and SE
	matrix pdb_results[`row', `col'] = _b[_cons]
	local ++col
	matrix pdb_results[`row', `col'] = r(table)[5,2]
	local ++col
	matrix pdb_results[`row', `col'] = r(table)[6,2]
	local col = `col' - 2
	local ++row
	
	* p-value of difference
	local p_`y' = round(r(table)[4,1], 0.0001)
	
	* High potential effect and SE
	lincom highCompetition + _cons
	matrix pdb_results[`row', `col'] = r(estimate)
	local ++col 
	matrix pdb_results[`row', `col'] = r(lb)
	local ++col 
	matrix pdb_results[`row', `col'] = r(ub)
	local ++col
	local row = 1
	
}

* Turn estimates into graphs
clear
svmat pdb_results
	
gen x = _n*1.5

* Effect of competition on potential
rename pdb_results1 comp_coef
rename pdb_results2 comp_lb
rename pdb_results3 comp_ub

graph twoway rcap comp_lb comp_ub x, lcolor(navy) || 						///
bar comp_coef x, ylab(0(20)100, nogrid) fcolor(navy%30) lcolor(none) 		///
ytitle("Probability of competitor (%)")  xtitle("")							///
xlab(1.5 "Low potential" 3 "High potential",nogrid) 						///
legend(off) text(95 2.25 "p-value = `p_competition'", size(small)) || 		///
scatteri 90 1.5 90 3, recast(line) mc(none) lc(black) lp(solid) lw(thin) ||	///
scatteri 90 1.5 90 3, recast(dropline) base(88) lw(thin) mc(none) 			///
lc(black) title("Panel A: Effect of high potential on competition")			///
lp(solid) name(comp, replace) 

* Effect of competition on maturation
rename pdb_results4 mat_coef
rename pdb_results5 mat_lb
rename pdb_results6 mat_ub

graph twoway rcap mat_lb mat_ub x, lcolor(navy) || 							///
bar mat_coef x, ylab(0(4)22, nogrid) fcolor(navy%30) lcolor(none) 			///
ytitle("Months to complete")  xtitle("")									///
xlab(1.5 "Low competition" 3 "High competition",nogrid) 					///
legend(off) text(21 2.25 "p-value = `p_maturation'", size(small)) || 		///
scatteri 19.5 1.5 19.5 3, recast(line) mc(none) lc(black) lp(solid) 		///
lw(thin) ||																	///
scatteri 19.5 1.5 19.5 3, recast(dropline) base(19.2) lw(thin) mc(none) 	///
lc(black) lp(solid) 														///
title("Panel B: Effect of high competition on maturation") name(mat, replace)

* Effect of competition on quality index
rename pdb_results25 index_coef
rename pdb_results26 index_lb
rename pdb_results27 index_ub

graph twoway rcap index_lb index_ub x, lcolor(navy) || 						///
bar index_coef x, ylab(0.6(0.1)1, nogrid) fcolor(navy%30) lcolor(none) 		///
ytitle("Quality index")  xtitle("")											///
xlab(1.5 "Low competition" 3 "High competition",nogrid) 					///
legend(off) text(1 2.25 "p-value = `p_qualityIndexScore'", size(small)) || 	///
scatteri 0.98 1.5 0.98 3, recast(line) mc(none) lc(black) 					///
lp(solid) lw(thin) ||														///
scatteri 0.98 1.5 0.98 3, recast(dropline) base(0.97) lw(thin) mc(none) 	///
lc(black) lp(solid) name(qual, replace)										///
title("Panel C: Effect of high competition on quality")

graph combine comp mat qual, row(1) xsize(6) ysize(3)
graph save "${figures}figure10.gph", replace
graph export "${figures}figure10.pdf", replace

/*------------------------------------------------------------------------------

	Figure E9: Survey Experiment Results: Individual Quality Measures

------------------------------------------------------------------------------*/

* Effect of competition on replicating main finding
rename pdb_results7 replicate_coef
rename pdb_results8 replicate_lb
rename pdb_results9 replicate_ub

graph twoway rcap replicate_lb replicate_ub x, lcolor(navy) || 				///
bar replicate_coef x, ylab(0.6(0.1)1.05, nogrid) fc(navy%30) lcolor(none) 	///
ytitle("Replicate main experiment")  xtitle("")								///
xlab(1.5 "Low competition" 3 "High competition",nogrid) 					///
legend(off) text(1 2.25 "p-value = `p_replicateScore'", size(small)) || 	///
scatteri 0.98 1.5 0.98 3, recast(line) mc(none) lc(black) 					///
lp(solid) lw(thin) ||														///
scatteri 0.98 1.5 0.98 3, recast(dropline) base(0.97) lw(thin) mc(none) 	///
lc(black) lp(solid) name(replicate, replace)								///
title("Panel A: Effect of high competition on replicating main experiment")

* Effect of competition on running additional experiments
rename pdb_results10 additional_coef
rename pdb_results11 additional_lb
rename pdb_results12 additional_ub

graph twoway rcap additional_lb additional_ub x, lcolor(navy) || 			///
bar additional_coef x, ylab(0.6(0.1)1.05, nogrid) fc(navy%30) lc(none) 		///
ytitle("Run additional experiments")  xtitle("")							///
xlab(1.5 "Low competition" 3 "High competition",nogrid) 					///
legend(off) text(1 2.25 "p-value = `p_additionalExpScore'", size(small)) || ///
scatteri 0.98 1.5 0.98 3, recast(line) 										///
mc(none) lc(black) lp(solid) lw(thin) ||									///
scatteri 0.98 1.5 0.98 3, recast(dropline) base(0.97) lw(thin) mc(none) 	///
lc(black) lp(solid) name(additional, replace)								///
title("Panel B: Effect of high competition on running additional experiments") 

* Effect of competition on carefully checking code
rename pdb_results13 code_coef
rename pdb_results14 code_lb
rename pdb_results15 code_ub

graph twoway rcap code_lb code_ub x, lcolor(navy) || 						///
bar code_coef x, ylab(0.6(0.1)1.05, nogrid) fcolor(navy%30) lcolor(none) 	///
ytitle("Perform code review")  xtitle("")									///
xlab(1.5 "Low competition" 3 "High competition",nogrid) 					///
legend(off) text(1 2.25 "p-value = `p_codeReviewScore'", size(small)) || 	///
scatteri 0.98 1.5 0.98 3, recast(line) 										///
mc(none) lc(black) lp(solid) lw(thin) ||									///
scatteri 0.98 1.5 0.98 3, recast(dropline) base(0.97) lw(thin) mc(none) 	///
lc(black) lp(solid) name(code_review, replace)								///
title("Panel C: Effect of high competition on performing code review")

* Effect of competition on carefully checking analytic work
rename pdb_results16 math_coef
rename pdb_results17 math_lb
rename pdb_results18 math_ub

graph twoway rcap math_lb math_ub x, lcolor(navy) || 						///
bar math_coef x, ylab(0.6(0.1)1.05, nogrid) fcolor(navy%30) lcolor(none) 	///
ytitle("Perform analytic review")  xtitle("")								///
xlab(1.5 "Low competition" 3 "High competition",nogrid) 					///
legend(off) text(1 2.25 "p-value = `p_mathReviewScore'", size(small)) || 	///
scatteri 0.98 1.5 0.98 3, recast(line) 										///
mc(none) lc(black) lp(solid) lw(thin) ||									///
scatteri 0.98 1.5 0.98 3, recast(dropline) base(0.97) lw(thin) mc(none) 	///
lc(black) lp(solid) name(analytic_review, replace) 							///
title("Panel D: Effect of high competition on performing analytic review")

* Effect of competition on proofreading
rename pdb_results19 proof_coef
rename pdb_results20 proof_lb
rename pdb_results21 proof_ub

graph twoway rcap proof_lb proof_ub x, lcolor(navy) || 						///
bar proof_coef x, ylab(0.6(0.1)1.05, nogrid) fcolor(navy%30) lcolor(none) 	///
ytitle("Proofread")  xtitle("")												///
xlab(1.5 "Low competition" 3 "High competition",nogrid) 					///
legend(off) text(1.04 2.25 "p-value = `p_proofreadScore'", size(small)) || 	///
scatteri 1.02 1.5 1.02 3, recast(line) 										///
mc(none) lc(black) lp(solid) lw(thin) ||									///
scatteri 1.02 1.5 1.02 3, recast(dropline) base(1.01) lw(thin) mc(none) 	///
lc(black) lp(solid) name(proof, replace) 									///
title("Panel E: Effect of high competition on proofreading")

* Effect of competition on lit review
rename pdb_results22 lit_coef
rename pdb_results23 lit_lb
rename pdb_results24 lit_ub

graph twoway rcap lit_lb lit_ub x, lcolor(navy) || 							///
bar lit_coef x, ylab(0.6(0.1)1.05, nogrid) fcolor(navy%30) lcolor(none) 	///
ytitle("Careful literature review")  xtitle("")								///
xlab(1.5 "Low competition" 3 "High competition",nogrid) 					///
legend(off) text(1 2.25 "p-value = `p_litReviewScore'", size(small)) || 	///
scatteri 0.98 1.5 0.98 3, recast(line) 										///
mc(none) lc(black) lp(solid) lw(thin) ||									///
scatteri 0.98 1.5 0.98 3, recast(dropline) base(0.97) lw(thin) mc(none) 	///
lc(black) lp(solid) name(lit_review, replace) 								///
title("Panel F: Effect of high competition on performing careful lit review") 


graph combine replicate additional code_review analytic_review 				///
	proof lit_review, cols(2) xsize(10) ysize(12) iscale(.4)

graph save "${figures}figureE9.gph", replace
graph export "${figures}figureE9.pdf", replace

restore

/*------------------------------------------------------------------------------

	Figure 11: External Validity: Cross-Field Results

------------------------------------------------------------------------------*/

* Initialize matrix to store regression results
	matrix results = J(11,37,.)
	local row = 1
	local col = 1


* Loop over all fields and run regressions potential / competition regressions
forval i = 0/10 {
		
	matrix results[`row', `col'] = `i'
	local ++col 
		
	reg competition highPotential if field == `i', r
	matrix results[`row', `col'] = _b[highPotential]
	local ++col
	matrix results[`row', `col'] = r(table)[5,1]
	local ++col
	matrix results[`row', `col'] = r(table)[6,1]
	local ++col
	sum competition if field == `i' & highPotential == 0
	matrix results[`row', `col'] = r(mean)
	local ++col
		
	reg maturation highCompetition if field == `i', r
	matrix results[`row', `col'] = _b[highCompetition]
	local ++col
	matrix results[`row', `col'] = r(table)[5,1]
	local ++col
	matrix results[`row', `col'] = r(table)[6,1]
	local ++col
	sum maturation if field == `i' & highCompetition == 0
	matrix results[`row', `col'] = r(mean)
	local ++col
		
	reg replicateScore highCompetition if field == `i', r
	matrix results[`row', `col'] = _b[highCompetition]
	local ++col
	matrix results[`row', `col'] = r(table)[5,1]
	local ++col
	matrix results[`row', `col'] = r(table)[6,1]
	local ++col
	sum replicateScore if field == `i' & highCompetition == 0
	matrix results[`row', `col'] = r(mean)
	local ++col
		
	reg additionalExpScore highCompetition if field == `i', r
	matrix results[`row', `col'] = _b[highCompetition]
	local ++col
	matrix results[`row', `col'] = r(table)[5,1]
	local ++col
	matrix results[`row', `col'] = r(table)[6,1]
	local ++col
	sum additionalExpScore if field == `i' & highCompetition == 0
	matrix results[`row', `col'] = r(mean)
	local ++col
		
	reg codeReviewScore highCompetition if field == `i', r
	matrix results[`row', `col'] = _b[highCompetition]
	local ++col
	matrix results[`row', `col'] = r(table)[5,1]
	local ++col
	matrix results[`row', `col'] = r(table)[6,1]
	local ++col
	sum codeReviewScore if field == `i' & highCompetition == 0
	matrix results[`row', `col'] = r(mean)
	local ++col
		
	reg mathReviewScore highCompetition if field == `i', r
	matrix results[`row', `col'] = _b[highCompetition]
	local ++col
	matrix results[`row', `col'] = r(table)[5,1]
	local ++col
	matrix results[`row', `col'] = r(table)[6,1]
	local ++col
	sum mathReviewScore if field == `i' & highCompetition == 0
	matrix results[`row', `col'] = r(mean)
	local ++col
		
	reg proofreadScore highCompetition if field == `i', r
	matrix results[`row', `col'] = _b[highCompetition]
	local ++col
	matrix results[`row', `col'] = r(table)[5,1]
	local ++col
	matrix results[`row', `col'] = r(table)[6,1]
	local ++col
	sum proofreadScore if field == `i' & highCompetition == 0
	matrix results[`row', `col'] = r(mean)
	local ++col
		
	reg litReviewScore highCompetition if field == `i', r
	matrix results[`row', `col'] = _b[highCompetition]
	local ++col
	matrix results[`row', `col'] = r(table)[5,1]
	local ++col
	matrix results[`row', `col'] = r(table)[6,1]
	local ++col
	sum litReviewScore if field == `i' & highCompetition == 0
	matrix results[`row', `col'] = r(mean)
	local ++col
		
	reg qualityIndexScore highCompetition if field == `i', r
	matrix results[`row', `col'] = _b[highCompetition]
	local ++col
	matrix results[`row', `col'] = r(table)[5,1]
	local ++col
	matrix results[`row', `col'] = r(table)[6,1]
	local ++col
	sum qualityIndexScore if field == `i' & highCompetition == 0
	matrix results[`row', `col'] = r(mean)
	local ++row
	local col = 1
}

* Use regression results to make coefficient plots
	
preserve

clear
svmat results

rename results1 field

* Effect of potential on competition
rename results2 comp
rename results3 comp_lb
rename results4 comp_ub
rename results5 comp_mean

twoway scatteri 23 -0.5 23 1.5 -3 1.5 -3 -0.5, recast(area) color(gs14) || 	///
rcap comp_lb comp_ub field, lcolor(navy) || 								///
scatter comp field, mcolor(navy) ylabel(, angle(h)) xscale(r(-1(1)11)) 		///
xlabel(0 "structural biology PDB" 1 "structural biology all" 				///
2 "cell biology" 3 "ecology" 4 "horticulture" 								///
5 "immunology" 6 "biochemistry" 7 "inorganic chemistry" 					///
8 "cond. matter physics" 9 "optics" 10 "social psychology", 				///
tlcolor(none) angle(45) nogrid) ylab(,nogrid) 								///
ytitle("Coefficient on high potential") yline(0) ylab(0(5)20) 				///
name(comp, replace) legend(off) xtitle("Field")								///
title("Panel A: Effect of high potential on competition (probability of competitor)") 

* Effect of competition on maturation
rename results6 maturation
rename results7 maturation_lb
rename results8 maturation_ub
rename results9 maturation_mean

twoway scatteri 2 -0.5 2 1.5 -8 1.5 -8 -0.5, recast(area) color(gs14) || 	///
rcap maturation_lb maturation_ub field, lcolor(navy) || 					///
scatter maturation field, mc(navy) ylabel(, angle(h)) xscale(r(-1(1)11))	///
xlabel(0 "structural biology PDB" 1 "structural biology all" 				///
2 "cell biology" 3 "ecology" 4 "horticulture" 								///
5 "immunology" 6 "biochemistry" 7 "inorganic chemistry" 					///
8 "cond. matter physics" 9 "optics" 10 "social psychology", 				///
tlcolor(none) angle(45) nogrid) ylab(,nogrid) 								///
ytitle("Coefficient on high competition") yline(0) ylab(2(2)-8) 			///
name(mat, replace) legend(off) xtitle("Field")								///
title("Panel B: Effect of high competition on maturation (months to complete)") 

* Effect of competition on quality index
rename results34 index
rename results35 index_lb
rename results36 index_ub
rename results37 index_mean

twoway scatteri 0.15 -0.5 0.15 1.5 -0.17 1.5 -0.17 -0.5, 					///
recast(area) color(gs14) || 												///
rcap index_lb index_ub field, lcolor(navy) || 								///
scatter index field, mcolor(navy) ylabel(, angle(h)) xscale(r(-1(1)11))		///
xlabel(0 "structural biology PDB" 1 "structural biology all"				///
2 "cell biology" 3 "ecology" 4 "horticulture" 								///
5 "immunology" 6 "biochemistry" 7 "inorganic chemistry" 					///
8 "cond. matter physics" 9 "optics" 10 "social psychology", 				///
tlcolor(none) angle(45) nogrid) ylab(,nogrid) 								///
yline(0) ylab(-0.15(.05)0.15) name(index, replace) legend(off) 				///
xtitle("Field") ytitle("Coefficient on high competition") 					///
title("Panel C: Effect of high competition on quality index")
	
graph combine comp mat index, row(3) xsize(4) ysize(8)
	
graph save "${figures}figure11.gph", replace
graph export "${figures}figure11.pdf", replace

restore
